from pymongo import MongoClient
from bs4 import BeautifulSoup
from pprint import pprint
import requests
import datetime
import json
import time
import re

header0 = {
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
    'Accept-Encoding': 'gzip, deflate',
    'Accept-Language': 'en,zh-CN;q=0.9,zh;q=0.8,en-US;q=0.7',
    'Cache-Control': 'max-age=0',
    'Connection': 'keep-alive',
    'Host': 'edu.jobui.com',
    'Referer': 'http://edu.jobui.com/majors/zhuanke/',
    'Upgrade-Insecure-Requests': '1',
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36'
}


class POST():
    def __init__(self):
        self.scrapy()

    def scrapy(self):
        mapping = {}

        url = 'http://edu.jobui.com/majors/benke/'
        resp = requests.get(url, headers = header0)
        soup = BeautifulSoup(resp.text, 'lxml')

        for item in soup.select('.fl.width33 a')[:5]:
            href = 'http://edu.jobui.com' + item.get('href') + 'company/'
            soup2 = BeautifulSoup(requests.get(href).text)

            if soup2.find(text = re.compile('可从事岗位')):
                major = item.text
                jobs = soup2.find(text = re.compile('可从事岗位')).parent.find_next_sibling().get_text().split()
                mapping[major] = jobs
            else:
                print('-' * 10, item.text, '-' * 10)
                print('暂无匹配')


POST()
